library(tidyverse)
library(patchwork)
library(fs)
library(haven)
library(wacolors)
library(sf)
library(ggtext)
library(glue)
library(kableExtra)
library(ggplot2)

#### READ DATA ####

file1 <- "data/mrp-ests_by-cd-race_noc.csv"
file2 <- "data/mrp-ests_by-cd-race_replicate.csv"
noc <- read_csv(path(file1)) %>% filter(year == 2020) %>% select(cd, pct_trump, race, p_mrp_twway) %>% 
                                rename(pct_trump_noc = pct_trump, p_mrp_twway_noc = p_mrp_twway)
baseline <- read_csv(path(file2)) %>% filter(year == 2020) %>% select(cd, pct_trump, race, p_mrp_twway)
combined <- merge(noc, baseline, by = c("cd", "race"), all = "inner", all.x = FALSE, all.y = FALSE)

#### avg difference (original - noc) ####

combined$p_mrp_twway_diff <- combined$p_mrp_twway - combined$p_mrp_twway_noc
for (race in c("Black", "Hispanic", "White", "Other")) {
  p <- ggplot(subset(combined, race == race), aes(x = p_mrp_twway_diff)) 
  p <- p + geom_histogram(stat = "density")
  print(p)
  print(race)
}


#### DF by RACE ####
black_data <- subset(combined, race == "Black")
white_data <- subset(combined, race == "White")
hispanic_data <- subset(combined, race == "Hispanic")
other_data <- subset(combined, race == "Other")

#### PLOT FOR EACH GROUP ####
black_p <- ggplot(black_data, aes(x = p_mrp_twway_noc, y = p_mrp_twway)) +
  geom_point() + 
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed")
  geom_smooth(method = "lm", se = FALSE, color = "blue") + 
  annotate("text", x = min(black_data$p_mrp_twway_noc), y = max(black_data$p_mrp_twway),
           label = paste("Correlation coefficient:", round(cor(black_data$p_mrp_twway_noc, black_data$p_mrp_twway), 2)),
           hjust = 0, vjust = 1) + 
  labs(x = "Trump share, contradictors removed", y = "Trump share, original estimates") + 
  theme(panel.grid.major = element_line(color = "gray", size = 0.5),  
        panel.background = element_rect(fill = "white"), 
        axis.line = element_line(color = "gray")) 

print(black_p)
ggsave(plot = black_p, "figures/black_bivariate_plot.pdf")

white_p <- ggplot(white_data, aes(x = p_mrp_twway_noc, y = p_mrp_twway)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  
  annotate("text", x = min(white_data$p_mrp_twway_noc), y = max(white_data$p_mrp_twway),
           label = paste("Correlation coefficient:", round(cor(white_data$p_mrp_twway_noc, white_data$p_mrp_twway), 2)),
           hjust = 0, vjust = 1) +
  labs(x = "Trump share, contradictors removed", y = "Trump share, original estimates") +
  theme(panel.grid.major = element_line(color = "gray", size = 0.5), 
        panel.background = element_rect(fill = "white"), 
        axis.line = element_line(color = "gray")) 

ggsave(plot = white_p, "figures/white_bivariate_plot.pdf")

hispanic_p <- ggplot(hispanic_data, aes(x = p_mrp_twway_noc, y = p_mrp_twway)) +
  geom_point() + 
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +  
  geom_smooth(method = "lm", se = FALSE, color = "blue") + 
  annotate("text", x = min(hispanic_data$p_mrp_twway_noc), y = max(hispanic_data$p_mrp_twway),
           label = paste("Correlation coefficient:", round(cor(hispanic_data$p_mrp_twway_noc, hispanic_data$p_mrp_twway), 2)),
           hjust = 0, vjust = 1) + 
  labs(x = "Trump share, contradictors removed", y = "Trump share, original estimates") +
  theme(panel.grid.major = element_line(color = "gray", size = 0.5), 
        panel.background = element_rect(fill = "white"), 
        axis.line = element_line(color = "gray"))  

print(hispanic_p)
ggsave(plot = hispanic_p, "figures/hispanic_bivariate_plot.pdf")

other_p <- ggplot(other_data, aes(x = p_mrp_twway_noc, y = p_mrp_twway)) +
  geom_point() + 
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") + 
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  
  annotate("text", x = min(other_data$p_mrp_twway_noc), y = max(other_data$p_mrp_twway),
           label = paste("Correlation coefficient:", round(cor(other_data$p_mrp_twway_noc, other_data$p_mrp_twway), 2)),
           hjust = 0, vjust = 0) +  
  labs(x = "Trump share, contradictors removed", y = "Trump share, original estimates") +
  theme(panel.grid.major = element_line(color = "gray", size = 0.5),  
        panel.background = element_rect(fill = "white"), 
        axis.line = element_line(color = "gray"))  
print(p)

print(other_p)
ggsave(plot = other_p, "figures/other_bivariate_plot.pdf")
